1 Background

We compare the inference results from TMB, aghq, adam, and tmbstan. Import these inference results as follows:

tmb <- readRDS("depends/tmb.rds")
aghq <- readRDS("depends/aghq.rds")
adam <- readRDS("depends/adam.rds")
tmbstan <- readRDS("depends/tmbstan.rds")

depends <- yaml::read_yaml("orderly.yml")$depends

1.1 Run details

For more information about the conditions under which these results were generated, see:

1.1.1 TMB

dependency_details <- function(i) {
  report_name <- names(depends[[i]])
  print(paste0("Inference results obtained from ", report_name, " with the query ", depends[[i]][[report_name]]$id))
  report_id <- orderly::orderly_search(query = depends[[i]][[report_name]]$id, report_name)
  print(paste0("Obtained report had ID ", report_id, " and was run with the following parameters:"))
  print(orderly::orderly_info(report_id, report_name)$parameters)
}

dependency_details(1)
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmb == TRUE)"
## [1] "Obtained report had ID 20230119-142551-23704e7b and was run with the following parameters:"
## $tmb
## [1] TRUE
## 
## $aghq
## [1] FALSE
## 
## $k
## [1] 2
## 
## $ndConstruction
## [1] "product"
## 
## $tmbstan
## [1] FALSE
## 
## $niter
## [1] 1000
## 
## $nthin
## [1] 1
## 
## $nsample
## [1] 1000

1.1.2 aghq

dependency_details(2)
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:aghq == TRUE && parameter:k == 1)"
## [1] "Obtained report had ID 20230119-170459-9b07786e and was run with the following parameters:"
## $aghq
## [1] TRUE
## 
## $k
## [1] 1
## 
## $ndConstruction
## [1] "product"
## 
## $tmb
## [1] FALSE
## 
## $tmbstan
## [1] FALSE
## 
## $niter
## [1] 1000
## 
## $nthin
## [1] 1
## 
## $nsample
## [1] 1000

1.1.3 adam

dependency_details(3)
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:adam == TRUE)"
## [1] "Obtained report had ID 20230209-142804-11535a68 and was run with the following parameters:"
## $adam
## [1] TRUE
## 
## $tmb
## [1] FALSE
## 
## $aghq
## [1] FALSE
## 
## $k
## [1] 2
## 
## $ndConstruction
## [1] "product"
## 
## $tmbstan
## [1] FALSE
## 
## $niter
## [1] 1000
## 
## $nthin
## [1] 1
## 
## $nsample
## [1] 1000
## 
## $area_level
## [1] 4

1.1.4 tmbstan

dependency_details(4)
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmbstan == TRUE)"
## [1] "Obtained report had ID 20230114-142916-efabd65d and was run with the following parameters:"
## $tmbstan
## [1] TRUE
## 
## $niter
## [1] 20000
## 
## $nthin
## [1] 20
## 
## $tmb
## [1] FALSE
## 
## $aghq
## [1] FALSE
## 
## $k
## [1] 2
## 
## $nsample
## [1] 1000

1.2 Time taken

data.frame(
  "TMB" = tmb$time,
  "aghq" = aghq$time,
  "adam" = adam$time,
  "tmbstan" = tmbstan$time
)
adam <- adam$adam

2 Histograms and empirical cumulative distribution plots

We create histograms and empirical cumulative distribution function (ECDF) plots of the samples from each method. All of the possible parameter names are as follows:

pars <- unique(names(tmb$fit$obj$env$par))

We will produce plots about the following subset of them. There is no particular reason to choose this subset rather than other, it’s quite arbitrary.

pars_eval <- pars %in% c("beta_rho", "beta_alpha", "beta_lambda", "beta_anc_rho", "beta_anc_alpha", "u_rho_x", "us_rho_x", "u_rho_xs")
names(pars_eval) <- pars
beta_rho <- histogram_and_ecdf("beta_rho", i = 1, return_df = TRUE)
saveRDS(beta_rho$df, "beta_rho.rds")

2.1 beta_rho

histogram_and_ecdf_helper <- function(par) lapply(1:sum(names(tmb$fit$obj$env$par) == par), histogram_and_ecdf, par = par)
histogram_and_ecdf_helper("beta_rho")
## [[1]]

## 
## [[2]]

2.2 beta_alpha

histogram_and_ecdf_helper("beta_alpha")
## [[1]]

## 
## [[2]]

2.3 beta_lambda

histogram_and_ecdf_helper("beta_lambda")
## [[1]]

## 
## [[2]]

2.4 beta_anc_rho

histogram_and_ecdf("beta_anc_rho")

2.5 beta_anc_alpha

histogram_and_ecdf("beta_anc_alpha")

2.6 logit

lapply(pars[stringr::str_starts(pars, "logit")], histogram_and_ecdf)

2.7 log_sigma

lapply(pars[stringr::str_starts(pars, "log_sigma")], histogram_and_ecdf)

2.8 u_rho_x

histogram_and_ecdf_helper("u_rho_x")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

2.9 u_rho_x

histogram_and_ecdf_helper("u_rho_x")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

2.10 us_rho_x

histogram_and_ecdf_helper("us_rho_x")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

2.11 u_rho_xs

histogram_and_ecdf_helper("u_rho_xs")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

2.12 us_rho_xs

histogram_and_ecdf_helper("us_rho_xs")

2.13 u_rho_as

histogram_and_ecdf_helper("u_rho_as")

2.14 u_alpha_x

histogram_and_ecdf_helper("u_alpha_x")

2.15 us_alpha_x

histogram_and_ecdf_helper("us_alpha_x")

2.16 u_alpha_xs

histogram_and_ecdf_helper("u_alpha_xs")

2.17 us_alpha_xs

histogram_and_ecdf_helper("us_alpha_xs")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

2.18 u_alpha_a

histogram_and_ecdf_helper("u_alpha_a")

2.19 u_alpha_as

histogram_and_ecdf_helper("u_alpha_as")

2.20 u_alpha_xa

histogram_and_ecdf_helper("u_alpha_xa")

2.21 ui_lambda_x

histogram_and_ecdf_helper("ui_lambda_x")

2.22 ui_anc_rho_x

histogram_and_ecdf_helper("ui_anc_rho_x")

2.23 ui_anc_alpha_x

histogram_and_ecdf_helper("ui_anc_alpha_x")

2.24 log_or_gamma

histogram_and_ecdf_helper("log_or_gamma")

3 KS plots

3.1 Individual parameters

ks_helper <- function(par, ...) {
  to_ks_df(par) %>% ks_plot(par, ...)
}

3.1.1 beta

ks_helper("beta")

ks_helper("beta", method1 = "TMB", method2 = "adam")

3.1.2 logit

ks_helper("logit")

ks_helper("logit", method1 = "TMB", method2 = "adam")

3.1.3 log_sigma

ks_helper("log_sigma")

ks_helper("log_sigma", method1 = "TMB", method2 = "adam")

3.1.4 u_rho_x

ks_helper("u_rho_x")

ks_helper("u_rho_x", method1 = "TMB", method2 = "adam")

3.1.5 u_rho_xs

ks_helper("u_rho_xs")

ks_helper("u_rho_xs", method1 = "TMB", method2 = "adam")

3.1.6 us_rho_x

ks_helper("us_rho_x")

ks_helper("us_rho_x", method1 = "TMB", method2 = "adam")

3.1.7 us_rho_xs

ks_helper("us_rho_xs")
ks_helper("us_rho_xs", method1 = "TMB", method2 = "adam")

3.1.8 u_rho_a

ks_helper("u_rho_a")
ks_helper("u_rho_a", method1 = "TMB", method2 = "adam")

3.1.9 u_rho_as

ks_helper("u_rho_as")
ks_helper("u_rho_as", method1 = "TMB", method2 = "adam")

3.1.10 u_alpha_x

ks_helper("u_alpha_x")
ks_helper("u_alpha_x", method1 = "TMB", method2 = "adam")

3.1.11 u_alpha_xs

ks_helper("u_alpha_xs")
ks_helper("u_alpha_xs", method1 = "TMB", method2 = "adam")

3.1.12 us_alpha_x

ks_helper("us_alpha_x")
ks_helper("us_alpha_x", method1 = "TMB", method2 = "adam")

3.1.13 us_alpha_xs

ks_helper("us_alpha_xs")
ks_helper("us_alpha_xs", method1 = "TMB", method2 = "adam")

3.1.14 u_alpha_a

ks_helper("u_alpha_a")
ks_helper("u_alpha_a", method1 = "TMB", method2 = "adam")

3.1.15 u_alpha_as

ks_helper("u_alpha_as")
ks_helper("u_alpha_as", method1 = "TMB", method2 = "adam")

3.1.16 u_alpha_xa

ks_helper("u_alpha_xa")
ks_helper("u_alpha_xa", method1 = "TMB", method2 = "adam")

3.1.17 ui_anc_rho_x

ks_helper("ui_anc_rho_x")
ks_helper("ui_anc_rho_x", method1 = "TMB", method2 = "adam")

3.1.18 ui_anc_alpha_x

ks_helper("ui_anc_alpha_x")
ks_helper("ui_anc_alpha_x", method1 = "TMB", method2 = "adam")

3.1.19 log_or_gamma

ks_helper("log_or_gamma")
ks_helper("log_or_gamma", method1 = "TMB", method2 = "adam")

3.2 Summary table

ks_summary <- lapply(unique(names(tmb$fit$obj$env$par)), function(x) {
  to_ks_df(x) %>%
    group_by(method) %>%
    summarise(ks = mean(ks), par = x)
}) %>%
  bind_rows() %>%
  pivot_wider(names_from = "method", values_from = "ks") %>%
  rename(
    "Parameter" = "par",
    "KS(adam, tmbstan)" = "adam",
    "KS(aghq, tmbstan)" = "aghq",
    "KS(TMB, tmbstan)" = "TMB",
  )

ks_summary %>%
  gt::gt() %>%
  gt::fmt_number(
    columns = starts_with("KS"),
    decimals = 3
  )
Parameter KS(adam, tmbstan) KS(aghq, tmbstan) KS(TMB, tmbstan)
beta_rho 0.091 0.090 0.085
beta_alpha 0.123 0.101 0.103
beta_lambda 0.042 0.061 0.074
beta_anc_rho 0.069 0.105 0.090
beta_anc_alpha 0.070 0.044 0.025
logit_phi_rho_x 0.536 0.536 0.118
log_sigma_rho_x 0.646 0.646 0.195
logit_phi_rho_xs 0.510 0.510 0.111
log_sigma_rho_xs 0.613 0.613 0.203
logit_phi_rho_a 0.552 0.552 0.080
log_sigma_rho_a 0.585 0.585 0.113
logit_phi_rho_as 0.569 0.569 0.104
log_sigma_rho_as 0.589 0.589 0.134
log_sigma_rho_xa 0.669 0.669 0.224
u_rho_x 0.090 0.092 0.095
us_rho_x 0.065 0.062 0.062
u_rho_xs 0.124 0.117 0.121
us_rho_xs 0.045 0.037 0.044
u_rho_a 0.090 0.087 0.083
u_rho_as 0.092 0.084 0.076
logit_phi_alpha_x 0.531 0.531 0.107
log_sigma_alpha_x 0.557 0.557 0.143
logit_phi_alpha_xs 0.551 0.551 0.143
log_sigma_alpha_xs 0.540 0.540 0.159
logit_phi_alpha_a 0.525 0.525 0.083
log_sigma_alpha_a 0.541 0.541 0.111
logit_phi_alpha_as 0.516 0.516 0.081
log_sigma_alpha_as 0.514 0.514 0.098
log_sigma_alpha_xa 0.627 0.627 0.146
u_alpha_x 0.096 0.096 0.089
us_alpha_x 0.087 0.084 0.083
u_alpha_xs 0.110 0.101 0.096
us_alpha_xs 0.102 0.097 0.095
u_alpha_a 0.084 0.085 0.084
u_alpha_as 0.112 0.096 0.102
u_alpha_xa 0.068 0.069 0.059
OmegaT_raw 0.518 0.518 0.022
log_betaT 0.689 0.689 0.244
log_sigma_lambda_x 0.736 0.736 0.269
ui_lambda_x 0.163 0.158 0.160
log_sigma_ancrho_x 0.504 0.504 0.037
log_sigma_ancalpha_x 0.519 0.519 0.083
ui_anc_rho_x 0.044 0.055 0.057
ui_anc_alpha_x 0.063 0.063 0.065
log_sigma_or_gamma 0.524 0.524 0.036
log_or_gamma 0.044 0.059 0.060
r <- adam$quad$obj$env$random
x_names <- names(adam$quad$obj$env$par[r])
theta_names <- names(adam$quad$obj$env$par[-r])

dict <- data.frame(
  Parameter = c(x_names, theta_names),
  Type = c(rep("Latent field", length(x_names)), rep("Hyperparameters", length(theta_names)))
)

ks_summary <- ks_summary %>%
  left_join(dict, by = "Parameter")

summary_ks_plot <- function(ks_summary, method1, method2) {
  ks_method1 <- paste0("KS(", method1, ", tmbstan)")
  ks_method2 <- paste0("KS(", method2, ", tmbstan)")
  
  scatterplot <- ggplot(ks_summary, aes(x = .data[[ks_method1]], y = .data[[ks_method2]])) +
    geom_point(alpha = 0.2) +
    annotate(geom = "polygon", x = c(-Inf, Inf, Inf), y = c(-Inf, Inf, -Inf), fill = multi.utils::cbpalette()[3], alpha = 0.1) +
    annotate(geom = "polygon", x = c(-Inf, Inf, -Inf), y = c(-Inf, Inf, Inf), fill = multi.utils::cbpalette()[5], alpha = 0.1) +
    xlim(0, 1) +
    ylim(0, 1) +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
    labs(x = ks_method1, y = ks_method2) +
    theme_minimal() 

  ks_summary[["KS difference"]] <- ks_summary[[ks_method1]] - ks_summary[[ks_method2]]
  
  jitterplot <- ggplot(ks_summary, aes(x = Type, y = `KS difference`)) +
    geom_jitter(width = 0.1, alpha = 0.2) +
    labs(x = "", y = paste0(ks_method1, " - ", ks_method2)) +
    theme_minimal()

  scatterplot + jitterplot
}

summary_ks_plot(ks_summary, "TMB", "aghq")

summary_ks_plot(ks_summary, "TMB", "adam")

summary_ks_plot(ks_summary, "aghq", "adam")

(ks_summary_out <- ks_summary %>%
  group_by(Type) %>%
  summarise(
    TMB = mean(`KS(TMB, tmbstan)`),
    aghq = mean(`KS(aghq, tmbstan)`),
    adam = mean(`KS(adam, tmbstan)`)
  ))

Save some things for output:

pdf("ks_summary_tmb_adam.pdf", h = 4, w = 6.25)
summary_ks_plot(ks_summary, "TMB", "adam")
dev.off()
## quartz_off_screen 
##                 2
saveRDS(ks_summary_out, "ks_summary_out.rds")

3.3 Investigation into large KS values

The following parameters have KS values greater than 0.5 in both dimensions.

(big_ks <- ks_summary %>%
  filter(`KS(aghq, tmbstan)` + `KS(TMB, tmbstan)` > 0.5) %>%
  pull(Parameter))
##  [1] "logit_phi_rho_x"      "log_sigma_rho_x"      "logit_phi_rho_xs"     "log_sigma_rho_xs"     "logit_phi_rho_a"      "log_sigma_rho_a"      "logit_phi_rho_as"     "log_sigma_rho_as"    
##  [9] "log_sigma_rho_xa"     "logit_phi_alpha_x"    "log_sigma_alpha_x"    "logit_phi_alpha_xs"   "log_sigma_alpha_xs"   "logit_phi_alpha_a"    "log_sigma_alpha_a"    "logit_phi_alpha_as"  
## [17] "log_sigma_alpha_as"   "log_sigma_alpha_xa"   "OmegaT_raw"           "log_betaT"            "log_sigma_lambda_x"   "log_sigma_ancrho_x"   "log_sigma_ancalpha_x" "log_sigma_or_gamma"

4 MMD

#' To write!

5 PSIS

Suppose we have two sets of samples from the posterior. For each sample we are going to want to evaluate the log-likelihood, so that we can calculate the log-likelihood ratios. We can extract the TMB objective function for the log-likelihood as follows:

tmb$fit$obj$fn()
## [1] 3327.999
## attr(,"logarithm")
## [1] TRUE
#' To write!